{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "[<img align=\"left\" src=\"https://colab.research.google.com/assets/colab-badge.svg\">](https://colab.research.google.com/github/sail-sg/envpool/blob/main/demo/envpool_demo_pong.ipynb)"
      ],
      "metadata": {
        "id": "wSww-5lcUxqN"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YMhfVuVIQQd9"
      },
      "outputs": [],
      "source": [
        "# OS: Ubuntu 18.04.4 LTS x86_64\n",
        "# Kernel: 4.18.0-15-generic\n",
        "# CPU: Intel(R) Core(TM) i9-10920X CPU (24) @ 3.50GHz\n",
        "# GPU: NVIDIA GeForce RTX 2080 Ti\n",
        "!apt-get install -y \\\n",
        "    libgl1-mesa-dev \\\n",
        "    libgl1-mesa-glx \\\n",
        "    libglew-dev \\\n",
        "    libosmesa6-dev \\\n",
        "    software-properties-common\n",
        "\n",
        "!apt-get install -y patchelf\n",
        "!pip install git+https://github.com/Denys88/rl_games\n",
        "!pip install envpool\n",
        "!pip install gym\n",
        "\n",
        "!pip install free-mujoco-py\n",
        "!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1\n",
        "!pip install imageio==2.4.1\n",
        "!pip install -U colabgymrender"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from rl_games.torch_runner import Runner"
      ],
      "metadata": {
        "id": "weGrLVCFTKdD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "## pong_envpool config:\n",
        "pong_config = {'params': {'seed': 322, \n",
        "                          'algo': {'name': 'a2c_discrete'}, \n",
        "                          'model': {'name': 'discrete_a2c'}, \n",
        "                          'network': {'name': 'actor_critic', \n",
        "                                      'separate': False, \n",
        "                                      'space': {'discrete': None}, \n",
        "                                      'cnn': {'permute_input': False, \n",
        "                                              'type': 'conv2d', \n",
        "                                              'activation': 'elu', \n",
        "                                              'initializer': {'name': 'default'}, \n",
        "                                              'regularizer': {'name': 'None'}, \n",
        "                                              'convs': [{'filters': 32, 'kernel_size': 8, 'strides': 4, 'padding': 0}, \n",
        "                                                        {'filters': 64, 'kernel_size': 4, 'strides': 2, 'padding': 0}, \n",
        "                                                        {'filters': 64, 'kernel_size': 3, 'strides': 1, 'padding': 0}]}, \n",
        "                                      'mlp': {'units': [512], 'activation': 'elu', 'initializer': {'name': 'orthogonal_initializer', 'gain': 1.41421356237}}}, \n",
        "                          'config': {'name': 'Pong-v5_envpool', \n",
        "                                     'env_name': 'envpool', \n",
        "                                     'score_to_win': 20.0, \n",
        "                                     'normalize_value': True, \n",
        "                                     'normalize_input': True, \n",
        "                                     'reward_shaper': {'min_val': -1, 'max_val': 1}, \n",
        "                                     'normalize_advantage': True, \n",
        "                                     'gamma': 0.99, \n",
        "                                     'tau': 0.95, \n",
        "                                     'learning_rate': '3e-4', \n",
        "                                     'lr_schedule': 'adaptive', \n",
        "                                     'kl_threshold': 0.01, \n",
        "                                     'grad_norm': 1.0, \n",
        "                                     'entropy_coef': 0.01, \n",
        "                                     'truncate_grads': True, \n",
        "                                     'e_clip': 0.2, \n",
        "                                     'clip_value': False, \n",
        "                                     'num_actors': 64, \n",
        "                                     'horizon_length': 128, \n",
        "                                     'minibatch_size': 2048, \n",
        "                                     'mini_epochs': 4, \n",
        "                                     'critic_coef': 2, \n",
        "                                     'max_epochs': 500, \n",
        "                                     'use_diagnostics': False, \n",
        "                                     'env_config': {'env_name': 'Pong-v5', 'has_lives': False}, \n",
        "                                     'player': {'render': False, 'games_num': 100, 'n_game_life': 1, 'determenistic': True}}}}"
      ],
      "metadata": {
        "id": "jghnij1BTOiH"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "runner = Runner()\n",
        "runner.load(pong_config)\n",
        "runner.run({\n",
        "    'train': True,\n",
        "})"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "w-DzDWZGUxQW",
        "outputId": "493b9918-eda9-4a52-c94b-7bedbee1c558"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "self.seed = 322\n",
            "Started to train\n",
            "current training device: cuda:0\n",
            "conv_name: conv2d\n",
            "build mlp: 3136\n",
            "RunningMeanStd:  (1,)\n",
            "RunningMeanStd:  (4, 84, 84)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/home/liubo/.local/lib/python3.8/site-packages/rl_games/common/experience.py:341: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
            "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
            "  self.tensor_dict['actions'] = self._create_tensor_from_space(gym.spaces.Box(low=0, high=1,shape=self.actions_shape, dtype=np.long), obs_base_shape)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "fps step: 11849 fps step and policy inference: 9092 fps total: 5652 epoch: 1/500\n",
            "fps step: 11318 fps step and policy inference: 8902 fps total: 6340 epoch: 2/500\n",
            "fps step: 10767 fps step and policy inference: 8437 fps total: 6099 epoch: 3/500\n",
            "fps step: 10548 fps step and policy inference: 8191 fps total: 5964 epoch: 4/500\n",
            "fps step: 9426 fps step and policy inference: 7164 fps total: 5403 epoch: 5/500\n",
            "fps step: 10008 fps step and policy inference: 7780 fps total: 5743 epoch: 6/500\n",
            "fps step: 7491 fps step and policy inference: 6021 fps total: 4720 epoch: 7/500\n",
            "fps step: 6301 fps step and policy inference: 5112 fps total: 4146 epoch: 8/500\n",
            "fps step: 9080 fps step and policy inference: 7118 fps total: 5373 epoch: 9/500\n",
            "fps step: 11450 fps step and policy inference: 9020 fps total: 6388 epoch: 10/500\n",
            "fps step: 10282 fps step and policy inference: 7925 fps total: 5822 epoch: 11/500\n",
            "fps step: 9751 fps step and policy inference: 7514 fps total: 5595 epoch: 12/500\n",
            "fps step: 9726 fps step and policy inference: 7670 fps total: 5677 epoch: 13/500\n",
            "fps step: 8260 fps step and policy inference: 6258 fps total: 4860 epoch: 14/500\n",
            "fps step: 7061 fps step and policy inference: 5648 fps total: 4489 epoch: 15/500\n",
            "fps step: 7864 fps step and policy inference: 6121 fps total: 4782 epoch: 16/500\n",
            "fps step: 8639 fps step and policy inference: 7048 fps total: 5329 epoch: 17/500\n",
            "fps step: 9205 fps step and policy inference: 7044 fps total: 5322 epoch: 18/500\n",
            "fps step: 9991 fps step and policy inference: 7796 fps total: 5744 epoch: 19/500\n",
            "fps step: 9746 fps step and policy inference: 7539 fps total: 5602 epoch: 20/500\n",
            "fps step: 11440 fps step and policy inference: 8887 fps total: 6313 epoch: 21/500\n",
            "fps step: 10600 fps step and policy inference: 8218 fps total: 5961 epoch: 22/500\n",
            "fps step: 10379 fps step and policy inference: 8049 fps total: 5880 epoch: 23/500\n",
            "fps step: 10150 fps step and policy inference: 7868 fps total: 5784 epoch: 24/500\n",
            "fps step: 10588 fps step and policy inference: 8375 fps total: 6049 epoch: 25/500\n",
            "fps step: 9702 fps step and policy inference: 7661 fps total: 5661 epoch: 26/500\n",
            "fps step: 9213 fps step and policy inference: 7453 fps total: 5550 epoch: 27/500\n",
            "fps step: 8447 fps step and policy inference: 6594 fps total: 5057 epoch: 28/500\n",
            "fps step: 8010 fps step and policy inference: 6259 fps total: 4853 epoch: 29/500\n",
            "fps step: 8213 fps step and policy inference: 6496 fps total: 5000 epoch: 30/500\n",
            "fps step: 8425 fps step and policy inference: 6553 fps total: 5033 epoch: 31/500\n",
            "fps step: 9779 fps step and policy inference: 7705 fps total: 5685 epoch: 32/500\n",
            "fps step: 9767 fps step and policy inference: 7593 fps total: 5625 epoch: 33/500\n",
            "fps step: 9725 fps step and policy inference: 7656 fps total: 5657 epoch: 34/500\n",
            "fps step: 9525 fps step and policy inference: 7398 fps total: 5516 epoch: 35/500\n",
            "fps step: 9963 fps step and policy inference: 7631 fps total: 5643 epoch: 36/500\n",
            "fps step: 11256 fps step and policy inference: 8859 fps total: 6290 epoch: 37/500\n",
            "fps step: 10843 fps step and policy inference: 8541 fps total: 6131 epoch: 38/500\n",
            "fps step: 11956 fps step and policy inference: 9404 fps total: 6560 epoch: 39/500\n",
            "fps step: 10633 fps step and policy inference: 8435 fps total: 6074 epoch: 40/500\n",
            "fps step: 11297 fps step and policy inference: 8843 fps total: 6283 epoch: 41/500\n",
            "fps step: 9894 fps step and policy inference: 7692 fps total: 5685 epoch: 42/500\n",
            "fps step: 11051 fps step and policy inference: 8612 fps total: 6169 epoch: 43/500\n",
            "fps step: 9465 fps step and policy inference: 7364 fps total: 5495 epoch: 44/500\n",
            "fps step: 11128 fps step and policy inference: 8773 fps total: 6246 epoch: 45/500\n",
            "fps step: 10391 fps step and policy inference: 8104 fps total: 5901 epoch: 46/500\n",
            "fps step: 9384 fps step and policy inference: 7361 fps total: 5493 epoch: 47/500\n",
            "fps step: 8734 fps step and policy inference: 6835 fps total: 5192 epoch: 48/500\n",
            "fps step: 8777 fps step and policy inference: 6818 fps total: 5181 epoch: 49/500\n",
            "fps step: 8144 fps step and policy inference: 6411 fps total: 4944 epoch: 50/500\n",
            "fps step: 8705 fps step and policy inference: 6878 fps total: 5209 epoch: 51/500\n",
            "fps step: 9128 fps step and policy inference: 7180 fps total: 5381 epoch: 52/500\n",
            "fps step: 9166 fps step and policy inference: 7276 fps total: 5435 epoch: 53/500\n",
            "fps step: 9397 fps step and policy inference: 7263 fps total: 5433 epoch: 54/500\n",
            "fps step: 8347 fps step and policy inference: 6542 fps total: 5019 epoch: 55/500\n",
            "fps step: 9542 fps step and policy inference: 7383 fps total: 5503 epoch: 56/500\n",
            "fps step: 10014 fps step and policy inference: 7747 fps total: 5701 epoch: 57/500\n",
            "fps step: 10784 fps step and policy inference: 8512 fps total: 6104 epoch: 58/500\n",
            "fps step: 10798 fps step and policy inference: 8435 fps total: 6062 epoch: 59/500\n",
            "fps step: 11156 fps step and policy inference: 8602 fps total: 6147 epoch: 60/500\n",
            "fps step: 11073 fps step and policy inference: 8710 fps total: 6205 epoch: 61/500\n",
            "fps step: 9472 fps step and policy inference: 7175 fps total: 5378 epoch: 62/500\n",
            "fps step: 9861 fps step and policy inference: 7774 fps total: 5710 epoch: 63/500\n",
            "fps step: 10183 fps step and policy inference: 7988 fps total: 5826 epoch: 64/500\n",
            "fps step: 8558 fps step and policy inference: 6478 fps total: 4977 epoch: 65/500\n",
            "fps step: 8705 fps step and policy inference: 6849 fps total: 5199 epoch: 66/500\n",
            "fps step: 8354 fps step and policy inference: 6625 fps total: 5069 epoch: 67/500\n",
            "fps step: 7300 fps step and policy inference: 5627 fps total: 4462 epoch: 68/500\n",
            "fps step: 8379 fps step and policy inference: 6438 fps total: 4957 epoch: 69/500\n",
            "fps step: 8769 fps step and policy inference: 6878 fps total: 5214 epoch: 70/500\n",
            "fps step: 9155 fps step and policy inference: 7201 fps total: 5399 epoch: 71/500\n",
            "fps step: 9007 fps step and policy inference: 6941 fps total: 5250 epoch: 72/500\n",
            "fps step: 9948 fps step and policy inference: 7777 fps total: 5717 epoch: 73/500\n",
            "fps step: 10541 fps step and policy inference: 8192 fps total: 5941 epoch: 74/500\n",
            "fps step: 10072 fps step and policy inference: 7927 fps total: 5797 epoch: 75/500\n",
            "fps step: 10543 fps step and policy inference: 8325 fps total: 6001 epoch: 76/500\n",
            "fps step: 9620 fps step and policy inference: 7411 fps total: 5510 epoch: 77/500\n",
            "fps step: 10720 fps step and policy inference: 8471 fps total: 6074 epoch: 78/500\n",
            "fps step: 8939 fps step and policy inference: 6890 fps total: 5219 epoch: 79/500\n",
            "fps step: 10368 fps step and policy inference: 8113 fps total: 5893 epoch: 80/500\n",
            "fps step: 7668 fps step and policy inference: 5899 fps total: 4632 epoch: 81/500\n",
            "fps step: 9229 fps step and policy inference: 7332 fps total: 5469 epoch: 82/500\n",
            "fps step: 8178 fps step and policy inference: 6256 fps total: 4848 epoch: 83/500\n",
            "fps step: 9179 fps step and policy inference: 7317 fps total: 5461 epoch: 84/500\n",
            "fps step: 7815 fps step and policy inference: 6077 fps total: 4741 epoch: 85/500\n",
            "fps step: 8977 fps step and policy inference: 6960 fps total: 5264 epoch: 86/500\n",
            "fps step: 8373 fps step and policy inference: 6560 fps total: 5027 epoch: 87/500\n",
            "fps step: 10279 fps step and policy inference: 7953 fps total: 5809 epoch: 88/500\n",
            "fps step: 10166 fps step and policy inference: 7964 fps total: 5814 epoch: 89/500\n",
            "fps step: 9232 fps step and policy inference: 7118 fps total: 5349 epoch: 90/500\n",
            "fps step: 8951 fps step and policy inference: 6815 fps total: 5172 epoch: 91/500\n",
            "fps step: 9574 fps step and policy inference: 7522 fps total: 5577 epoch: 92/500\n",
            "fps step: 10061 fps step and policy inference: 7971 fps total: 5817 epoch: 93/500\n",
            "fps step: 9071 fps step and policy inference: 6990 fps total: 5276 epoch: 94/500\n",
            "fps step: 8400 fps step and policy inference: 6653 fps total: 5082 epoch: 95/500\n",
            "fps step: 8288 fps step and policy inference: 6619 fps total: 5062 epoch: 96/500\n",
            "fps step: 10456 fps step and policy inference: 8152 fps total: 5905 epoch: 97/500\n",
            "fps step: 9020 fps step and policy inference: 7077 fps total: 5329 epoch: 98/500\n",
            "fps step: 7633 fps step and policy inference: 6008 fps total: 4694 epoch: 99/500\n",
            "fps step: 8805 fps step and policy inference: 6885 fps total: 5218 epoch: 100/500\n",
            "saving next best rewards:  [14.216911]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7531 fps step and policy inference: 5608 fps total: 4441 epoch: 101/500\n",
            "saving next best rewards:  [14.405285]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9184 fps step and policy inference: 7164 fps total: 5369 epoch: 102/500\n",
            "saving next best rewards:  [14.5568285]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10136 fps step and policy inference: 7790 fps total: 5724 epoch: 103/500\n",
            "saving next best rewards:  [14.601259]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9620 fps step and policy inference: 7463 fps total: 5540 epoch: 104/500\n",
            "saving next best rewards:  [14.688693]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9927 fps step and policy inference: 7709 fps total: 5673 epoch: 105/500\n",
            "saving next best rewards:  [14.764588]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9168 fps step and policy inference: 7149 fps total: 5368 epoch: 106/500\n",
            "saving next best rewards:  [14.931184]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9774 fps step and policy inference: 7435 fps total: 5521 epoch: 107/500\n",
            "saving next best rewards:  [14.951872]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9036 fps step and policy inference: 6879 fps total: 5211 epoch: 108/500\n",
            "saving next best rewards:  [15.092006]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9841 fps step and policy inference: 7708 fps total: 5670 epoch: 109/500\n",
            "saving next best rewards:  [15.322743]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7745 fps step and policy inference: 5965 fps total: 4672 epoch: 110/500\n",
            "saving next best rewards:  [15.607838]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9167 fps step and policy inference: 7029 fps total: 5297 epoch: 111/500\n",
            "saving next best rewards:  [15.718489]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8733 fps step and policy inference: 6742 fps total: 5131 epoch: 112/500\n",
            "saving next best rewards:  [15.91862]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7613 fps step and policy inference: 6036 fps total: 4714 epoch: 113/500\n",
            "saving next best rewards:  [16.332552]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8786 fps step and policy inference: 6768 fps total: 5148 epoch: 114/500\n",
            "saving next best rewards:  [16.428352]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9201 fps step and policy inference: 7167 fps total: 5375 epoch: 115/500\n",
            "saving next best rewards:  [16.499426]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7106 fps step and policy inference: 5372 fps total: 4298 epoch: 116/500\n",
            "saving next best rewards:  [16.68881]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10166 fps step and policy inference: 8030 fps total: 5849 epoch: 117/500\n",
            "saving next best rewards:  [16.819283]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9098 fps step and policy inference: 6879 fps total: 5207 epoch: 118/500\n",
            "fps step: 9942 fps step and policy inference: 7776 fps total: 5708 epoch: 119/500\n",
            "saving next best rewards:  [16.85109]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10083 fps step and policy inference: 7929 fps total: 5792 epoch: 120/500\n",
            "saving next best rewards:  [16.96527]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10931 fps step and policy inference: 8568 fps total: 6126 epoch: 121/500\n",
            "saving next best rewards:  [16.985617]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9287 fps step and policy inference: 7354 fps total: 5479 epoch: 122/500\n",
            "saving next best rewards:  [17.13335]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9703 fps step and policy inference: 7468 fps total: 5543 epoch: 123/500\n",
            "saving next best rewards:  [17.301378]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7415 fps step and policy inference: 5631 fps total: 4459 epoch: 124/500\n",
            "saving next best rewards:  [17.41403]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9389 fps step and policy inference: 7418 fps total: 5515 epoch: 125/500\n",
            "saving next best rewards:  [17.585842]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9387 fps step and policy inference: 7244 fps total: 5411 epoch: 126/500\n",
            "saving next best rewards:  [17.633883]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7779 fps step and policy inference: 6017 fps total: 4700 epoch: 127/500\n",
            "saving next best rewards:  [17.794622]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9073 fps step and policy inference: 6975 fps total: 5265 epoch: 128/500\n",
            "saving next best rewards:  [17.860123]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8957 fps step and policy inference: 7022 fps total: 5292 epoch: 129/500\n",
            "saving next best rewards:  [17.985346]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8422 fps step and policy inference: 6510 fps total: 4991 epoch: 130/500\n",
            "saving next best rewards:  [18.073238]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8676 fps step and policy inference: 6740 fps total: 5128 epoch: 131/500\n",
            "saving next best rewards:  [18.15825]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9110 fps step and policy inference: 7074 fps total: 5314 epoch: 132/500\n",
            "saving next best rewards:  [18.248512]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 11077 fps step and policy inference: 8658 fps total: 6168 epoch: 133/500\n",
            "saving next best rewards:  [18.266026]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9061 fps step and policy inference: 6928 fps total: 5237 epoch: 134/500\n",
            "saving next best rewards:  [18.300531]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9847 fps step and policy inference: 7706 fps total: 5670 epoch: 135/500\n",
            "saving next best rewards:  [18.341106]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9361 fps step and policy inference: 7363 fps total: 5483 epoch: 136/500\n",
            "saving next best rewards:  [18.422405]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10995 fps step and policy inference: 8631 fps total: 6157 epoch: 137/500\n",
            "saving next best rewards:  [18.43818]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7884 fps step and policy inference: 6049 fps total: 4721 epoch: 138/500\n",
            "saving next best rewards:  [18.514875]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9436 fps step and policy inference: 7247 fps total: 5417 epoch: 139/500\n",
            "saving next best rewards:  [18.558983]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8176 fps step and policy inference: 6304 fps total: 4868 epoch: 140/500\n",
            "saving next best rewards:  [18.633701]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8744 fps step and policy inference: 6844 fps total: 5195 epoch: 141/500\n",
            "saving next best rewards:  [18.684343]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8721 fps step and policy inference: 6736 fps total: 5128 epoch: 142/500\n",
            "saving next best rewards:  [18.710203]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7351 fps step and policy inference: 5633 fps total: 4460 epoch: 143/500\n",
            "saving next best rewards:  [18.771118]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8528 fps step and policy inference: 6561 fps total: 5023 epoch: 144/500\n",
            "saving next best rewards:  [18.797619]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8524 fps step and policy inference: 6670 fps total: 5088 epoch: 145/500\n",
            "saving next best rewards:  [18.80793]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9660 fps step and policy inference: 7600 fps total: 5610 epoch: 146/500\n",
            "saving next best rewards:  [18.8549]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10568 fps step and policy inference: 8243 fps total: 5959 epoch: 147/500\n",
            "saving next best rewards:  [18.85635]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9410 fps step and policy inference: 7404 fps total: 5498 epoch: 148/500\n",
            "saving next best rewards:  [18.89416]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9399 fps step and policy inference: 7303 fps total: 5441 epoch: 149/500\n",
            "fps step: 10094 fps step and policy inference: 7898 fps total: 5774 epoch: 150/500\n",
            "fps step: 8099 fps step and policy inference: 6183 fps total: 4801 epoch: 151/500\n",
            "saving next best rewards:  [18.924257]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8548 fps step and policy inference: 6640 fps total: 5067 epoch: 152/500\n",
            "saving next best rewards:  [18.977512]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9955 fps step and policy inference: 7725 fps total: 5677 epoch: 153/500\n",
            "saving next best rewards:  [18.977736]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8966 fps step and policy inference: 6983 fps total: 5262 epoch: 154/500\n",
            "saving next best rewards:  [19.017935]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8207 fps step and policy inference: 6414 fps total: 4934 epoch: 155/500\n",
            "saving next best rewards:  [19.05534]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8936 fps step and policy inference: 6991 fps total: 5272 epoch: 156/500\n",
            "saving next best rewards:  [19.089886]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8777 fps step and policy inference: 6811 fps total: 5169 epoch: 157/500\n",
            "saving next best rewards:  [19.134487]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9019 fps step and policy inference: 6987 fps total: 5270 epoch: 158/500\n",
            "saving next best rewards:  [19.158789]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8764 fps step and policy inference: 6667 fps total: 5086 epoch: 159/500\n",
            "saving next best rewards:  [19.165627]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9951 fps step and policy inference: 7775 fps total: 5702 epoch: 160/500\n",
            "saving next best rewards:  [19.169094]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8993 fps step and policy inference: 6962 fps total: 5259 epoch: 161/500\n",
            "fps step: 9070 fps step and policy inference: 7091 fps total: 5328 epoch: 162/500\n",
            "saving next best rewards:  [19.184063]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9192 fps step and policy inference: 7157 fps total: 5363 epoch: 163/500\n",
            "saving next best rewards:  [19.194647]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8663 fps step and policy inference: 6611 fps total: 5053 epoch: 164/500\n",
            "saving next best rewards:  [19.208666]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9517 fps step and policy inference: 7272 fps total: 5428 epoch: 165/500\n",
            "saving next best rewards:  [19.232168]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9693 fps step and policy inference: 7577 fps total: 5601 epoch: 166/500\n",
            "saving next best rewards:  [19.24507]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9494 fps step and policy inference: 7440 fps total: 5525 epoch: 167/500\n",
            "fps step: 8207 fps step and policy inference: 6484 fps total: 4975 epoch: 168/500\n",
            "fps step: 9580 fps step and policy inference: 7704 fps total: 5667 epoch: 169/500\n",
            "saving next best rewards:  [19.268833]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8058 fps step and policy inference: 6160 fps total: 4784 epoch: 170/500\n",
            "saving next best rewards:  [19.296295]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8166 fps step and policy inference: 6303 fps total: 4871 epoch: 171/500\n",
            "saving next best rewards:  [19.298754]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8982 fps step and policy inference: 6908 fps total: 5222 epoch: 172/500\n",
            "saving next best rewards:  [19.302809]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9467 fps step and policy inference: 7279 fps total: 5435 epoch: 173/500\n",
            "saving next best rewards:  [19.30978]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9246 fps step and policy inference: 7169 fps total: 5369 epoch: 174/500\n",
            "saving next best rewards:  [19.326975]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10076 fps step and policy inference: 7859 fps total: 5753 epoch: 175/500\n",
            "fps step: 9859 fps step and policy inference: 7744 fps total: 5691 epoch: 176/500\n",
            "saving next best rewards:  [19.337345]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7263 fps step and policy inference: 5574 fps total: 4422 epoch: 177/500\n",
            "saving next best rewards:  [19.387865]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9523 fps step and policy inference: 7473 fps total: 5542 epoch: 178/500\n",
            "saving next best rewards:  [19.40414]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10390 fps step and policy inference: 7931 fps total: 5789 epoch: 179/500\n",
            "fps step: 8643 fps step and policy inference: 6564 fps total: 5018 epoch: 180/500\n",
            "saving next best rewards:  [19.411938]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8519 fps step and policy inference: 6466 fps total: 4967 epoch: 181/500\n",
            "saving next best rewards:  [19.42364]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8613 fps step and policy inference: 6725 fps total: 5120 epoch: 182/500\n",
            "saving next best rewards:  [19.43238]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9221 fps step and policy inference: 6992 fps total: 5274 epoch: 183/500\n",
            "fps step: 9661 fps step and policy inference: 7591 fps total: 5605 epoch: 184/500\n",
            "saving next best rewards:  [19.434416]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9686 fps step and policy inference: 7670 fps total: 5648 epoch: 185/500\n",
            "fps step: 7513 fps step and policy inference: 5723 fps total: 4518 epoch: 186/500\n",
            "fps step: 9274 fps step and policy inference: 7365 fps total: 5484 epoch: 187/500\n",
            "fps step: 10389 fps step and policy inference: 8195 fps total: 5927 epoch: 188/500\n",
            "saving next best rewards:  [19.442959]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8425 fps step and policy inference: 6418 fps total: 4941 epoch: 189/500\n",
            "fps step: 9669 fps step and policy inference: 7565 fps total: 5592 epoch: 190/500\n",
            "fps step: 9014 fps step and policy inference: 7024 fps total: 5292 epoch: 191/500\n",
            "saving next best rewards:  [19.446276]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8926 fps step and policy inference: 7021 fps total: 5288 epoch: 192/500\n",
            "fps step: 8973 fps step and policy inference: 6995 fps total: 5275 epoch: 193/500\n",
            "fps step: 10528 fps step and policy inference: 8326 fps total: 6001 epoch: 194/500\n",
            "fps step: 10035 fps step and policy inference: 7859 fps total: 5744 epoch: 195/500\n",
            "fps step: 10435 fps step and policy inference: 8334 fps total: 6003 epoch: 196/500\n",
            "fps step: 11058 fps step and policy inference: 8694 fps total: 6182 epoch: 197/500\n",
            "fps step: 8768 fps step and policy inference: 6897 fps total: 5215 epoch: 198/500\n",
            "fps step: 8938 fps step and policy inference: 6974 fps total: 5265 epoch: 199/500\n",
            "fps step: 9062 fps step and policy inference: 7196 fps total: 5383 epoch: 200/500\n",
            "fps step: 8552 fps step and policy inference: 6588 fps total: 5041 epoch: 201/500\n",
            "fps step: 8479 fps step and policy inference: 6840 fps total: 5188 epoch: 202/500\n",
            "fps step: 9859 fps step and policy inference: 7567 fps total: 5594 epoch: 203/500\n",
            "fps step: 10622 fps step and policy inference: 8316 fps total: 5994 epoch: 204/500\n",
            "fps step: 9479 fps step and policy inference: 7558 fps total: 5588 epoch: 205/500\n",
            "fps step: 8119 fps step and policy inference: 6379 fps total: 4917 epoch: 206/500\n",
            "saving next best rewards:  [19.467575]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8336 fps step and policy inference: 6555 fps total: 5020 epoch: 207/500\n",
            "fps step: 9968 fps step and policy inference: 7796 fps total: 5711 epoch: 208/500\n",
            "fps step: 8218 fps step and policy inference: 6257 fps total: 4841 epoch: 209/500\n",
            "fps step: 9383 fps step and policy inference: 7348 fps total: 5472 epoch: 210/500\n",
            "fps step: 10277 fps step and policy inference: 8035 fps total: 5843 epoch: 211/500\n",
            "fps step: 9323 fps step and policy inference: 7274 fps total: 5425 epoch: 212/500\n",
            "fps step: 9817 fps step and policy inference: 7792 fps total: 5712 epoch: 213/500\n",
            "fps step: 8785 fps step and policy inference: 7013 fps total: 5282 epoch: 214/500\n",
            "saving next best rewards:  [19.483074]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9717 fps step and policy inference: 7670 fps total: 5650 epoch: 215/500\n",
            "saving next best rewards:  [19.508457]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8558 fps step and policy inference: 6772 fps total: 5144 epoch: 216/500\n",
            "fps step: 9336 fps step and policy inference: 7173 fps total: 5374 epoch: 217/500\n",
            "fps step: 10327 fps step and policy inference: 8188 fps total: 5916 epoch: 218/500\n",
            "saving next best rewards:  [19.522772]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8374 fps step and policy inference: 6444 fps total: 4955 epoch: 219/500\n",
            "saving next best rewards:  [19.536459]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9765 fps step and policy inference: 7666 fps total: 5647 epoch: 220/500\n",
            "saving next best rewards:  [19.550224]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10088 fps step and policy inference: 7869 fps total: 5748 epoch: 221/500\n",
            "saving next best rewards:  [19.554722]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8023 fps step and policy inference: 6144 fps total: 4776 epoch: 222/500\n",
            "fps step: 7912 fps step and policy inference: 6072 fps total: 4732 epoch: 223/500\n",
            "fps step: 9776 fps step and policy inference: 7747 fps total: 5692 epoch: 224/500\n",
            "fps step: 9531 fps step and policy inference: 7503 fps total: 5559 epoch: 225/500\n",
            "fps step: 11245 fps step and policy inference: 8890 fps total: 6295 epoch: 226/500\n",
            "fps step: 8614 fps step and policy inference: 6837 fps total: 5179 epoch: 227/500\n",
            "saving next best rewards:  [19.56048]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8639 fps step and policy inference: 6623 fps total: 5060 epoch: 228/500\n",
            "fps step: 10255 fps step and policy inference: 8152 fps total: 5904 epoch: 229/500\n",
            "saving next best rewards:  [19.578505]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9311 fps step and policy inference: 7366 fps total: 5484 epoch: 230/500\n",
            "saving next best rewards:  [19.587627]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10272 fps step and policy inference: 7938 fps total: 5791 epoch: 231/500\n",
            "saving next best rewards:  [19.591751]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9580 fps step and policy inference: 7644 fps total: 5634 epoch: 232/500\n",
            "saving next best rewards:  [19.605936]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9287 fps step and policy inference: 7137 fps total: 5355 epoch: 233/500\n",
            "saving next best rewards:  [19.607738]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10730 fps step and policy inference: 8341 fps total: 6004 epoch: 234/500\n",
            "saving next best rewards:  [19.61166]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9623 fps step and policy inference: 7444 fps total: 5524 epoch: 235/500\n",
            "saving next best rewards:  [19.617258]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8015 fps step and policy inference: 6184 fps total: 4795 epoch: 236/500\n",
            "saving next best rewards:  [19.617605]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9876 fps step and policy inference: 7692 fps total: 5656 epoch: 237/500\n",
            "saving next best rewards:  [19.628962]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9867 fps step and policy inference: 7549 fps total: 5575 epoch: 238/500\n",
            "saving next best rewards:  [19.636345]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9527 fps step and policy inference: 7438 fps total: 5528 epoch: 239/500\n",
            "fps step: 9985 fps step and policy inference: 7994 fps total: 5823 epoch: 240/500\n",
            "fps step: 8558 fps step and policy inference: 6764 fps total: 5137 epoch: 241/500\n",
            "fps step: 8772 fps step and policy inference: 6984 fps total: 5267 epoch: 242/500\n",
            "fps step: 8608 fps step and policy inference: 6675 fps total: 5089 epoch: 243/500\n",
            "fps step: 9347 fps step and policy inference: 7393 fps total: 5494 epoch: 244/500\n",
            "fps step: 10408 fps step and policy inference: 8108 fps total: 5883 epoch: 245/500\n",
            "fps step: 9886 fps step and policy inference: 7787 fps total: 5714 epoch: 246/500\n",
            "fps step: 9687 fps step and policy inference: 7622 fps total: 5624 epoch: 247/500\n",
            "fps step: 10836 fps step and policy inference: 8430 fps total: 6054 epoch: 248/500\n",
            "fps step: 8733 fps step and policy inference: 6692 fps total: 5099 epoch: 249/500\n",
            "fps step: 8717 fps step and policy inference: 6969 fps total: 5259 epoch: 250/500\n",
            "fps step: 8695 fps step and policy inference: 6664 fps total: 5077 epoch: 251/500\n",
            "fps step: 9096 fps step and policy inference: 7127 fps total: 5350 epoch: 252/500\n",
            "fps step: 9969 fps step and policy inference: 7684 fps total: 5652 epoch: 253/500\n",
            "fps step: 10101 fps step and policy inference: 7967 fps total: 5799 epoch: 254/500\n",
            "fps step: 10228 fps step and policy inference: 8123 fps total: 5892 epoch: 255/500\n",
            "fps step: 9103 fps step and policy inference: 7321 fps total: 5457 epoch: 256/500\n",
            "fps step: 9583 fps step and policy inference: 7686 fps total: 5661 epoch: 257/500\n",
            "fps step: 8855 fps step and policy inference: 7073 fps total: 5315 epoch: 258/500\n",
            "fps step: 8984 fps step and policy inference: 7099 fps total: 5335 epoch: 259/500\n",
            "fps step: 9831 fps step and policy inference: 7635 fps total: 5632 epoch: 260/500\n",
            "fps step: 8767 fps step and policy inference: 6868 fps total: 5200 epoch: 261/500\n",
            "fps step: 9661 fps step and policy inference: 7428 fps total: 5521 epoch: 262/500\n",
            "fps step: 10817 fps step and policy inference: 8611 fps total: 6143 epoch: 263/500\n",
            "fps step: 9661 fps step and policy inference: 7580 fps total: 5600 epoch: 264/500\n",
            "fps step: 10069 fps step and policy inference: 8089 fps total: 5875 epoch: 265/500\n",
            "fps step: 9988 fps step and policy inference: 7651 fps total: 5632 epoch: 266/500\n",
            "fps step: 9342 fps step and policy inference: 7323 fps total: 5459 epoch: 267/500\n",
            "fps step: 8443 fps step and policy inference: 6704 fps total: 5105 epoch: 268/500\n",
            "fps step: 9867 fps step and policy inference: 7620 fps total: 5617 epoch: 269/500\n",
            "fps step: 8991 fps step and policy inference: 7107 fps total: 5338 epoch: 270/500\n",
            "fps step: 8308 fps step and policy inference: 6379 fps total: 4916 epoch: 271/500\n",
            "fps step: 8884 fps step and policy inference: 7002 fps total: 5282 epoch: 272/500\n",
            "fps step: 10481 fps step and policy inference: 8171 fps total: 5916 epoch: 273/500\n",
            "fps step: 9349 fps step and policy inference: 7511 fps total: 5557 epoch: 274/500\n",
            "fps step: 8598 fps step and policy inference: 6644 fps total: 5072 epoch: 275/500\n",
            "fps step: 10002 fps step and policy inference: 7847 fps total: 5745 epoch: 276/500\n",
            "fps step: 9773 fps step and policy inference: 7588 fps total: 5609 epoch: 277/500\n",
            "fps step: 10620 fps step and policy inference: 8421 fps total: 6045 epoch: 278/500\n",
            "fps step: 10199 fps step and policy inference: 7942 fps total: 5799 epoch: 279/500\n",
            "fps step: 7878 fps step and policy inference: 5983 fps total: 4677 epoch: 280/500\n",
            "fps step: 8014 fps step and policy inference: 6084 fps total: 4732 epoch: 281/500\n",
            "fps step: 8508 fps step and policy inference: 6554 fps total: 5021 epoch: 282/500\n",
            "fps step: 10028 fps step and policy inference: 7769 fps total: 5700 epoch: 283/500\n",
            "fps step: 8880 fps step and policy inference: 7013 fps total: 5283 epoch: 284/500\n",
            "fps step: 9593 fps step and policy inference: 7601 fps total: 5612 epoch: 285/500\n",
            "fps step: 8361 fps step and policy inference: 6442 fps total: 4955 epoch: 286/500\n",
            "fps step: 10058 fps step and policy inference: 7877 fps total: 5762 epoch: 287/500\n",
            "fps step: 9509 fps step and policy inference: 7606 fps total: 5612 epoch: 288/500\n",
            "fps step: 9549 fps step and policy inference: 7587 fps total: 5604 epoch: 289/500\n",
            "fps step: 10914 fps step and policy inference: 8646 fps total: 6159 epoch: 290/500\n",
            "fps step: 7823 fps step and policy inference: 5951 fps total: 4658 epoch: 291/500\n",
            "fps step: 8512 fps step and policy inference: 6535 fps total: 5007 epoch: 292/500\n",
            "fps step: 10715 fps step and policy inference: 8424 fps total: 6053 epoch: 293/500\n",
            "fps step: 9755 fps step and policy inference: 7589 fps total: 5606 epoch: 294/500\n",
            "fps step: 9484 fps step and policy inference: 7522 fps total: 5568 epoch: 295/500\n",
            "fps step: 8462 fps step and policy inference: 6432 fps total: 4951 epoch: 296/500\n",
            "fps step: 10158 fps step and policy inference: 8129 fps total: 5893 epoch: 297/500\n",
            "fps step: 10065 fps step and policy inference: 7917 fps total: 5770 epoch: 298/500\n",
            "fps step: 8776 fps step and policy inference: 6764 fps total: 5141 epoch: 299/500\n",
            "fps step: 9717 fps step and policy inference: 7524 fps total: 5569 epoch: 300/500\n",
            "fps step: 8312 fps step and policy inference: 6452 fps total: 4960 epoch: 301/500\n",
            "fps step: 9235 fps step and policy inference: 7296 fps total: 5443 epoch: 302/500\n",
            "fps step: 8778 fps step and policy inference: 6927 fps total: 5236 epoch: 303/500\n",
            "fps step: 10154 fps step and policy inference: 8041 fps total: 5849 epoch: 304/500\n",
            "fps step: 7919 fps step and policy inference: 6273 fps total: 4843 epoch: 305/500\n",
            "fps step: 9274 fps step and policy inference: 7190 fps total: 5383 epoch: 306/500\n",
            "fps step: 9363 fps step and policy inference: 7346 fps total: 5471 epoch: 307/500\n",
            "fps step: 9347 fps step and policy inference: 7365 fps total: 5481 epoch: 308/500\n",
            "fps step: 10251 fps step and policy inference: 8071 fps total: 5862 epoch: 309/500\n",
            "fps step: 8079 fps step and policy inference: 6253 fps total: 4841 epoch: 310/500\n",
            "fps step: 9708 fps step and policy inference: 7663 fps total: 5643 epoch: 311/500\n",
            "fps step: 9461 fps step and policy inference: 7463 fps total: 5535 epoch: 312/500\n",
            "fps step: 8929 fps step and policy inference: 6971 fps total: 5256 epoch: 313/500\n",
            "fps step: 10609 fps step and policy inference: 8309 fps total: 5988 epoch: 314/500\n",
            "fps step: 8276 fps step and policy inference: 6336 fps total: 4888 epoch: 315/500\n",
            "fps step: 10481 fps step and policy inference: 8323 fps total: 5994 epoch: 316/500\n",
            "saving next best rewards:  [19.644829]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9005 fps step and policy inference: 6899 fps total: 5218 epoch: 317/500\n",
            "saving next best rewards:  [19.66227]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9450 fps step and policy inference: 7476 fps total: 5543 epoch: 318/500\n",
            "saving next best rewards:  [19.682032]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9613 fps step and policy inference: 7590 fps total: 5604 epoch: 319/500\n",
            "saving next best rewards:  [19.700638]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9262 fps step and policy inference: 7052 fps total: 5307 epoch: 320/500\n",
            "saving next best rewards:  [19.709528]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9928 fps step and policy inference: 7652 fps total: 5642 epoch: 321/500\n",
            "saving next best rewards:  [19.71531]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 7503 fps step and policy inference: 5779 fps total: 4552 epoch: 322/500\n",
            "saving next best rewards:  [19.72733]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8804 fps step and policy inference: 6715 fps total: 5111 epoch: 323/500\n",
            "fps step: 10129 fps step and policy inference: 7934 fps total: 5790 epoch: 324/500\n",
            "fps step: 8016 fps step and policy inference: 6175 fps total: 4794 epoch: 325/500\n",
            "fps step: 9712 fps step and policy inference: 7605 fps total: 5613 epoch: 326/500\n",
            "fps step: 9359 fps step and policy inference: 7426 fps total: 5511 epoch: 327/500\n",
            "fps step: 10866 fps step and policy inference: 8389 fps total: 6028 epoch: 328/500\n",
            "fps step: 9498 fps step and policy inference: 7354 fps total: 5475 epoch: 329/500\n",
            "fps step: 7873 fps step and policy inference: 6051 fps total: 4714 epoch: 330/500\n",
            "fps step: 8250 fps step and policy inference: 6424 fps total: 4939 epoch: 331/500\n",
            "fps step: 9366 fps step and policy inference: 7266 fps total: 5429 epoch: 332/500\n",
            "fps step: 8507 fps step and policy inference: 6638 fps total: 5069 epoch: 333/500\n",
            "fps step: 9623 fps step and policy inference: 7587 fps total: 5604 epoch: 334/500\n",
            "fps step: 8756 fps step and policy inference: 6813 fps total: 5169 epoch: 335/500\n",
            "saving next best rewards:  [19.72744]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 8868 fps step and policy inference: 6858 fps total: 5197 epoch: 336/500\n",
            "fps step: 9576 fps step and policy inference: 7487 fps total: 5549 epoch: 337/500\n",
            "fps step: 9786 fps step and policy inference: 7745 fps total: 5684 epoch: 338/500\n",
            "saving next best rewards:  [19.733776]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9133 fps step and policy inference: 7178 fps total: 5381 epoch: 339/500\n",
            "fps step: 8417 fps step and policy inference: 6588 fps total: 5041 epoch: 340/500\n",
            "fps step: 9032 fps step and policy inference: 7074 fps total: 5315 epoch: 341/500\n",
            "saving next best rewards:  [19.735119]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9990 fps step and policy inference: 7756 fps total: 5697 epoch: 342/500\n",
            "fps step: 9977 fps step and policy inference: 7903 fps total: 5773 epoch: 343/500\n",
            "saving next best rewards:  [19.736557]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9047 fps step and policy inference: 7128 fps total: 5359 epoch: 344/500\n",
            "saving next best rewards:  [19.739565]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 10085 fps step and policy inference: 7781 fps total: 5707 epoch: 345/500\n",
            "saving next best rewards:  [19.7473]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9674 fps step and policy inference: 7620 fps total: 5623 epoch: 346/500\n",
            "fps step: 10482 fps step and policy inference: 8313 fps total: 5988 epoch: 347/500\n",
            "fps step: 9529 fps step and policy inference: 7641 fps total: 5630 epoch: 348/500\n",
            "fps step: 9467 fps step and policy inference: 7482 fps total: 5545 epoch: 349/500\n",
            "fps step: 9184 fps step and policy inference: 7351 fps total: 5473 epoch: 350/500\n",
            "saving next best rewards:  [19.757431]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "fps step: 9473 fps step and policy inference: 7221 fps total: 5402 epoch: 351/500\n",
            "fps step: 9017 fps step and policy inference: 7019 fps total: 5286 epoch: 352/500\n",
            "fps step: 9684 fps step and policy inference: 7601 fps total: 5605 epoch: 353/500\n",
            "fps step: 8249 fps step and policy inference: 6342 fps total: 4893 epoch: 354/500\n",
            "fps step: 9634 fps step and policy inference: 7563 fps total: 5591 epoch: 355/500\n",
            "fps step: 9181 fps step and policy inference: 7364 fps total: 5479 epoch: 356/500\n",
            "fps step: 9096 fps step and policy inference: 7083 fps total: 5323 epoch: 357/500\n",
            "fps step: 9185 fps step and policy inference: 7352 fps total: 5475 epoch: 358/500\n",
            "fps step: 9205 fps step and policy inference: 7222 fps total: 5399 epoch: 359/500\n",
            "fps step: 9157 fps step and policy inference: 7214 fps total: 5397 epoch: 360/500\n",
            "fps step: 9129 fps step and policy inference: 7066 fps total: 5306 epoch: 361/500\n",
            "fps step: 8610 fps step and policy inference: 6718 fps total: 5114 epoch: 362/500\n",
            "fps step: 9078 fps step and policy inference: 7023 fps total: 5285 epoch: 363/500\n",
            "fps step: 9982 fps step and policy inference: 7789 fps total: 5712 epoch: 364/500\n",
            "fps step: 9125 fps step and policy inference: 7026 fps total: 5288 epoch: 365/500\n",
            "fps step: 9322 fps step and policy inference: 7389 fps total: 5493 epoch: 366/500\n",
            "fps step: 9733 fps step and policy inference: 7591 fps total: 5606 epoch: 367/500\n",
            "fps step: 10021 fps step and policy inference: 7770 fps total: 5695 epoch: 368/500\n",
            "fps step: 10056 fps step and policy inference: 8063 fps total: 5858 epoch: 369/500\n",
            "fps step: 10183 fps step and policy inference: 7984 fps total: 5809 epoch: 370/500\n",
            "fps step: 8693 fps step and policy inference: 6934 fps total: 5241 epoch: 371/500\n",
            "fps step: 9813 fps step and policy inference: 7764 fps total: 5700 epoch: 372/500\n",
            "fps step: 8428 fps step and policy inference: 6586 fps total: 5040 epoch: 373/500\n",
            "fps step: 9225 fps step and policy inference: 7228 fps total: 5402 epoch: 374/500\n",
            "fps step: 8938 fps step and policy inference: 6849 fps total: 5190 epoch: 375/500\n",
            "fps step: 9360 fps step and policy inference: 7202 fps total: 5387 epoch: 376/500\n",
            "fps step: 10249 fps step and policy inference: 8137 fps total: 5890 epoch: 377/500\n",
            "fps step: 10365 fps step and policy inference: 8058 fps total: 5853 epoch: 378/500\n",
            "fps step: 8881 fps step and policy inference: 6907 fps total: 5223 epoch: 379/500\n",
            "fps step: 8890 fps step and policy inference: 6923 fps total: 5228 epoch: 380/500\n",
            "fps step: 9036 fps step and policy inference: 7172 fps total: 5372 epoch: 381/500\n",
            "fps step: 10266 fps step and policy inference: 7892 fps total: 5768 epoch: 382/500\n",
            "fps step: 7861 fps step and policy inference: 5971 fps total: 4668 epoch: 383/500\n",
            "fps step: 10274 fps step and policy inference: 7954 fps total: 5796 epoch: 384/500\n",
            "fps step: 9462 fps step and policy inference: 7491 fps total: 5552 epoch: 385/500\n",
            "fps step: 9322 fps step and policy inference: 7290 fps total: 5439 epoch: 386/500\n",
            "fps step: 8802 fps step and policy inference: 6833 fps total: 5183 epoch: 387/500\n",
            "fps step: 8684 fps step and policy inference: 6771 fps total: 5146 epoch: 388/500\n",
            "fps step: 9851 fps step and policy inference: 7789 fps total: 5713 epoch: 389/500\n",
            "fps step: 9188 fps step and policy inference: 7134 fps total: 5350 epoch: 390/500\n",
            "fps step: 10023 fps step and policy inference: 7921 fps total: 5783 epoch: 391/500\n",
            "fps step: 9423 fps step and policy inference: 7388 fps total: 5488 epoch: 392/500\n",
            "fps step: 9079 fps step and policy inference: 7128 fps total: 5345 epoch: 393/500\n",
            "fps step: 10581 fps step and policy inference: 8278 fps total: 5964 epoch: 394/500\n",
            "fps step: 9424 fps step and policy inference: 7269 fps total: 5426 epoch: 395/500\n",
            "fps step: 8442 fps step and policy inference: 6482 fps total: 4977 epoch: 396/500\n",
            "fps step: 8205 fps step and policy inference: 6439 fps total: 4951 epoch: 397/500\n",
            "fps step: 11269 fps step and policy inference: 8894 fps total: 6284 epoch: 398/500\n",
            "fps step: 9729 fps step and policy inference: 7552 fps total: 5586 epoch: 399/500\n",
            "fps step: 7201 fps step and policy inference: 5514 fps total: 4385 epoch: 400/500\n",
            "fps step: 10137 fps step and policy inference: 8104 fps total: 5873 epoch: 401/500\n",
            "fps step: 7655 fps step and policy inference: 5755 fps total: 4537 epoch: 402/500\n",
            "fps step: 7720 fps step and policy inference: 5969 fps total: 4669 epoch: 403/500\n",
            "fps step: 9109 fps step and policy inference: 7186 fps total: 5381 epoch: 404/500\n",
            "fps step: 9183 fps step and policy inference: 7040 fps total: 5303 epoch: 405/500\n",
            "fps step: 8864 fps step and policy inference: 6824 fps total: 5177 epoch: 406/500\n",
            "fps step: 9841 fps step and policy inference: 7624 fps total: 5621 epoch: 407/500\n",
            "fps step: 9575 fps step and policy inference: 7304 fps total: 5443 epoch: 408/500\n",
            "fps step: 9316 fps step and policy inference: 7307 fps total: 5448 epoch: 409/500\n",
            "fps step: 9275 fps step and policy inference: 7266 fps total: 5428 epoch: 410/500\n",
            "fps step: 9866 fps step and policy inference: 7874 fps total: 5761 epoch: 411/500\n",
            "fps step: 9724 fps step and policy inference: 7604 fps total: 5609 epoch: 412/500\n",
            "fps step: 9952 fps step and policy inference: 7872 fps total: 5750 epoch: 413/500\n",
            "fps step: 8739 fps step and policy inference: 6743 fps total: 5130 epoch: 414/500\n",
            "fps step: 10820 fps step and policy inference: 8493 fps total: 6084 epoch: 415/500\n",
            "fps step: 9476 fps step and policy inference: 7542 fps total: 5576 epoch: 416/500\n",
            "fps step: 9158 fps step and policy inference: 7168 fps total: 5371 epoch: 417/500\n",
            "fps step: 8887 fps step and policy inference: 7077 fps total: 5319 epoch: 418/500\n",
            "fps step: 9262 fps step and policy inference: 7242 fps total: 5408 epoch: 419/500\n",
            "fps step: 10031 fps step and policy inference: 7874 fps total: 5759 epoch: 420/500\n",
            "fps step: 9187 fps step and policy inference: 7105 fps total: 5336 epoch: 421/500\n",
            "fps step: 9942 fps step and policy inference: 7723 fps total: 5675 epoch: 422/500\n",
            "fps step: 9981 fps step and policy inference: 7808 fps total: 5720 epoch: 423/500\n",
            "fps step: 9955 fps step and policy inference: 7818 fps total: 5723 epoch: 424/500\n",
            "fps step: 8182 fps step and policy inference: 6408 fps total: 4934 epoch: 425/500\n",
            "fps step: 8547 fps step and policy inference: 6618 fps total: 5053 epoch: 426/500\n",
            "fps step: 9124 fps step and policy inference: 7196 fps total: 5388 epoch: 427/500\n",
            "fps step: 8625 fps step and policy inference: 6674 fps total: 5089 epoch: 428/500\n",
            "fps step: 9528 fps step and policy inference: 7435 fps total: 5512 epoch: 429/500\n",
            "fps step: 8211 fps step and policy inference: 6358 fps total: 4903 epoch: 430/500\n",
            "fps step: 8504 fps step and policy inference: 6663 fps total: 5083 epoch: 431/500\n",
            "fps step: 10131 fps step and policy inference: 8095 fps total: 5877 epoch: 432/500\n",
            "fps step: 8699 fps step and policy inference: 6728 fps total: 5120 epoch: 433/500\n",
            "fps step: 8860 fps step and policy inference: 6886 fps total: 5211 epoch: 434/500\n",
            "fps step: 8941 fps step and policy inference: 6954 fps total: 5250 epoch: 435/500\n",
            "fps step: 8965 fps step and policy inference: 6825 fps total: 5175 epoch: 436/500\n",
            "fps step: 9038 fps step and policy inference: 6970 fps total: 5260 epoch: 437/500\n",
            "fps step: 10194 fps step and policy inference: 8022 fps total: 5837 epoch: 438/500\n",
            "fps step: 9488 fps step and policy inference: 7505 fps total: 5551 epoch: 439/500\n",
            "fps step: 10083 fps step and policy inference: 7849 fps total: 5745 epoch: 440/500\n",
            "fps step: 9214 fps step and policy inference: 7201 fps total: 5390 epoch: 441/500\n",
            "fps step: 10473 fps step and policy inference: 8341 fps total: 6004 epoch: 442/500\n",
            "fps step: 9396 fps step and policy inference: 7395 fps total: 5499 epoch: 443/500\n",
            "fps step: 10055 fps step and policy inference: 8069 fps total: 5862 epoch: 444/500\n",
            "fps step: 8934 fps step and policy inference: 6996 fps total: 5273 epoch: 445/500\n",
            "fps step: 8951 fps step and policy inference: 7031 fps total: 5292 epoch: 446/500\n",
            "fps step: 7595 fps step and policy inference: 5877 fps total: 4609 epoch: 447/500\n",
            "fps step: 9329 fps step and policy inference: 7407 fps total: 5505 epoch: 448/500\n",
            "fps step: 9257 fps step and policy inference: 7228 fps total: 5406 epoch: 449/500\n",
            "fps step: 8829 fps step and policy inference: 6824 fps total: 5172 epoch: 450/500\n",
            "fps step: 10002 fps step and policy inference: 7821 fps total: 5731 epoch: 451/500\n",
            "fps step: 9345 fps step and policy inference: 7353 fps total: 5475 epoch: 452/500\n",
            "fps step: 9631 fps step and policy inference: 7628 fps total: 5623 epoch: 453/500\n",
            "fps step: 9948 fps step and policy inference: 7781 fps total: 5709 epoch: 454/500\n",
            "fps step: 8673 fps step and policy inference: 6715 fps total: 5112 epoch: 455/500\n",
            "fps step: 11702 fps step and policy inference: 9220 fps total: 6445 epoch: 456/500\n",
            "fps step: 9455 fps step and policy inference: 7429 fps total: 5513 epoch: 457/500\n",
            "fps step: 8438 fps step and policy inference: 6536 fps total: 5008 epoch: 458/500\n",
            "fps step: 9250 fps step and policy inference: 7240 fps total: 5412 epoch: 459/500\n",
            "fps step: 9140 fps step and policy inference: 7105 fps total: 5327 epoch: 460/500\n",
            "fps step: 9233 fps step and policy inference: 7256 fps total: 5431 epoch: 461/500\n",
            "fps step: 7204 fps step and policy inference: 5524 fps total: 4394 epoch: 462/500\n",
            "fps step: 9044 fps step and policy inference: 7245 fps total: 5413 epoch: 463/500\n",
            "fps step: 9307 fps step and policy inference: 7200 fps total: 5381 epoch: 464/500\n",
            "fps step: 11346 fps step and policy inference: 8914 fps total: 6295 epoch: 465/500\n",
            "fps step: 9451 fps step and policy inference: 7264 fps total: 5426 epoch: 466/500\n",
            "fps step: 8665 fps step and policy inference: 6620 fps total: 5061 epoch: 467/500\n",
            "fps step: 7801 fps step and policy inference: 5960 fps total: 4663 epoch: 468/500\n",
            "fps step: 9465 fps step and policy inference: 7485 fps total: 5551 epoch: 469/500\n",
            "fps step: 9572 fps step and policy inference: 7412 fps total: 5506 epoch: 470/500\n",
            "fps step: 9893 fps step and policy inference: 7773 fps total: 5707 epoch: 471/500\n",
            "fps step: 9099 fps step and policy inference: 7167 fps total: 5368 epoch: 472/500\n",
            "fps step: 8981 fps step and policy inference: 6827 fps total: 5174 epoch: 473/500\n",
            "fps step: 9243 fps step and policy inference: 7059 fps total: 5309 epoch: 474/500\n",
            "fps step: 7665 fps step and policy inference: 5901 fps total: 4627 epoch: 475/500\n",
            "fps step: 7541 fps step and policy inference: 5954 fps total: 4661 epoch: 476/500\n",
            "fps step: 8585 fps step and policy inference: 6646 fps total: 5072 epoch: 477/500\n",
            "fps step: 10771 fps step and policy inference: 8422 fps total: 6035 epoch: 478/500\n",
            "fps step: 9583 fps step and policy inference: 7447 fps total: 5527 epoch: 479/500\n",
            "fps step: 10889 fps step and policy inference: 8614 fps total: 6146 epoch: 480/500\n",
            "fps step: 9537 fps step and policy inference: 7374 fps total: 5479 epoch: 481/500\n",
            "fps step: 8029 fps step and policy inference: 6193 fps total: 4802 epoch: 482/500\n",
            "fps step: 9203 fps step and policy inference: 7209 fps total: 5392 epoch: 483/500\n",
            "fps step: 10002 fps step and policy inference: 7792 fps total: 5714 epoch: 484/500\n",
            "fps step: 9731 fps step and policy inference: 7482 fps total: 5546 epoch: 485/500\n",
            "fps step: 9883 fps step and policy inference: 7762 fps total: 5698 epoch: 486/500\n",
            "fps step: 8984 fps step and policy inference: 7122 fps total: 5340 epoch: 487/500\n",
            "fps step: 9821 fps step and policy inference: 7826 fps total: 5733 epoch: 488/500\n",
            "fps step: 9200 fps step and policy inference: 7139 fps total: 5352 epoch: 489/500\n",
            "fps step: 9063 fps step and policy inference: 7226 fps total: 5398 epoch: 490/500\n",
            "fps step: 8678 fps step and policy inference: 6683 fps total: 5095 epoch: 491/500\n",
            "fps step: 9752 fps step and policy inference: 7756 fps total: 5695 epoch: 492/500\n",
            "fps step: 10208 fps step and policy inference: 8090 fps total: 5870 epoch: 493/500\n",
            "fps step: 8983 fps step and policy inference: 7010 fps total: 5281 epoch: 494/500\n",
            "fps step: 9886 fps step and policy inference: 7772 fps total: 5698 epoch: 495/500\n",
            "fps step: 8691 fps step and policy inference: 6727 fps total: 5121 epoch: 496/500\n",
            "fps step: 8804 fps step and policy inference: 6815 fps total: 5173 epoch: 497/500\n",
            "fps step: 8438 fps step and policy inference: 6575 fps total: 5033 epoch: 498/500\n",
            "fps step: 9069 fps step and policy inference: 7043 fps total: 5299 epoch: 499/500\n",
            "fps step: 9205 fps step and policy inference: 7004 fps total: 5274 epoch: 500/500\n",
            "saving next best rewards:  [19.764378]\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/Pong-v5_envpool.pth'\n",
            "=> saving checkpoint 'runs/Pong-v5_envpool_23-09-07-59/nn/last_Pong-v5_envpoolep500rew[19.764378].pth'\n",
            "MAX EPOCHS NUM!\n"
          ]
        }
      ]
    }
  ]
}